#' All functions from dups package
#' @param data data.frame or tibble
#' @param ... id variables
#'
#' @return
#' @export
#'
#' @examples

#' Count the total number of duplicate observations in a dataset

dups_count <- function(data, ...) {
  dat_dups <- data %>%
    dups_tag(...)

  sum(dat_dups$dups_tag == TRUE)
}

#' Drop duplicates

dups_drop <- function(data, ..., warn = TRUE) {

  data_new <- data %>%
    dplyr::distinct(..., .keep_all = TRUE)

  n_before <- nrow(data)
  n_after <- nrow(data_new)
  n_dropped <- n_before - n_after
  if (warn) warning(paste("Dropping ", n_dropped, " rows due to duplication"))

  data_new
}

#' Filter to look at only non-unique rows based on id variables
#' Filter a dataset to only look at rows that are duplicated, useful for diagnosing why there may be duplicates in the data and therefore how to remove them

dups_filter <- function(data, ...) {

  filtered_data <- data %>%
    dplyr::group_by(...) %>%
    dplyr::filter(n() > 1) %>%
    dplyr::ungroup()

  filtered_data

}

#' Print a duplicates report
#' Prints a reports of all the duplicates in a dataframe based on the uniquely identifying variable(s)

dups_report <- function(data, ..., print_only = TRUE) {
  dups_report <- data %>%
    dplyr::count(...) %>%
    dplyr::rename(copies = n) %>%
    dplyr::count(copies) %>%
    dplyr::mutate(observations = n * copies,
           surplus = observations - (observations / copies)) %>%
    dplyr::select(-n)

  if (print_only) {
    print(dups_report)
    invisible(data)
  } else if (!print_only) {
    dups_report
  }

}

#' Tag all observations that are duplicates
#' Returns the dataframe with a new logical variable that denotes whether the observation is a duplicate observation or not

dups_tag <- function(data, ..., var = "dups_tag") {
  data %>%
    dplyr::group_by(...) %>%
    dplyr::mutate("{var}" := dplyr::n() > 1) %>%
    dplyr::ungroup()
}

#' View duplicate rows in the viewing portal

dups_view <- function(data, ..., n_viewing = 200) {

  filtered_data <- data %>%
    dplyr::group_by(...) %>%
    dplyr::filter(n() > 1)

  if (is.null(n_viewing)) tibble::view(filtered_data)

  else {
    n_groups <- dplyr::n_groups(filtered_data)
    if (n_groups < n_viewing) n_viewing <- n_groups

    trackr::view_n(filtered_data, n = n_viewing)
  }

  invisible(data)
}

#' Warn if there are duplicates in a dataset
#' For use within functions, warns the user if there are any duplicates in the data

dups_warn <- function(data, ...) {
  n_dups <- dups_count(data, ...)
  if (n_dups > 0) warning(paste0("There are ", n_dups, " non-unique rows in the dataset"))

  invisible(data)
}


#' Stop if there are duplicates in a dataset
#' For use within functions, stops operations and throws an error if there are any duplicates in the data

dups_stop <- function(data, ...) {
  n_dups <- dups_count(data, ...)
  if (n_dups > 0) stop(paste0("There are ", n_dups, " non-unique rows in the dataset"))

  invisible(data)

}